if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("SeuratDisk")) {install.packages("SeuratDisk"); require("SeuratDisk")}
if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!requireNamespace('BiocManager', quietly = TRUE)) {install.packages('BiocManager'); require("BiocManager")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("paletteer")) {install.packages("paletteer"); require("paletteer")} # color palette
if (!require("grDevices")) {install.packages("grDevices"); require("grDevices")} # for grDevices palette
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")} # for data frame transformation
if (!require("tibble")) {install.packages("tibble"); require("tibble")} # for table transformation
if (!require("geneName")) {install.packages("geneName"); require("geneName")}
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("gghighlight")) {install.packages("gghighlight"); require("gghighlight")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("ggupset")) {install.packages("ggupset"); require("ggupset")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")}
if (!require("viridis")) {install.packages("viridis"); require("viridis")}
library(SeuratData)
library(openxlsx)
library(gplots)
library(ggvenn)This file was made to explore the expression of KRT19 in TAL snRNASeq data from the KPMP 2021 snRNAseq dataset for Vidhi Dalal.
This is the KPMP object that was originally formatted by Xiao-Tong Su (manually annotated meta-data) and then subsetted by Jessica Bahena Lopez for her TAL heterogenity manuscript.
KPMP.TAL@meta.data$subclass.l2 <- factor(KPMP.TAL@meta.data$subclass.l2, levels = c("C-TAL", "M-TAL", "aTAL1", "aTAL2", "dC-TAL", "dM-TAL", "MD"))
DimPlot(KPMP.TAL, group.by = "subclass.l1")## An object of class Seurat
## 29732 features across 35424 samples within 1 assay
## Active assay: RNA (29732 features, 5526 variable features)
## 3 layers present: counts, data, scale.data
## 3 dimensional reductions calculated: pca, tsne, umap
gene_list <- list(c("SLC12A1", "CRYAB", "KRT19", "MMP7", "CLU", "SPP1"))
gene_unlist <- unlist(c("SLC12A1", "CRYAB", "KRT19", "MMP7", "CLU", "SPP1"))
VlnPlot(KPMP.TAL, features = gene_unlist, group.by = "subclass.l2", pt.size = 0.1)# Create a dataframe with the average expression for the list of genes
df <- AverageExpression(object = KPMP.TAL, features = gene_unlist, group.by = 'subclass.l2')$RNA
df## C-TAL M-TAL aTAL1 aTAL2 dC-TAL dM-TAL
## SLC12A1 64.6614275 78.7514259 15.6880536 20.4932148 14.816813 42.571376
## CRYAB 0.1801357 0.3792102 0.5580475 0.5916533 9.150091 1.314851
## KRT19 0.1646312 0.3307886 0.6143769 0.4947854 5.244709 2.094962
## MMP7 0.1497348 0.1068843 1.0871972 1.7900269 15.009163 1.037691
## CLU 0.2266637 0.3040202 0.7711453 1.0898689 13.479982 2.338266
## SPP1 2.9975088 1.4675743 8.1901007 23.4953085 104.092847 19.745206
## MD
## SLC12A1 44.4480911
## CRYAB 0.1844493
## KRT19 0.2642673
## MMP7 0.1830210
## CLU 0.2682441
## SPP1 4.1327308
# Calculate Z score for each row (could skip if you want raw expression)
df <- t(scale(t(df)))
# convert df to tidy format
df_tidy <- df %>%
as.data.frame() %>%
rownames_to_column(var = "Gene") %>%
pivot_longer(cols = -Gene, names_to = "subclass.l2", values_to = "Expression")
# Graph with geom_tile
f1 <- ggplot(df_tidy, aes(x = subclass.l2, y = Gene, fill = Expression)) +
geom_tile()
f1# Make pretty with ggplot2
f2 <- ggplot(df_tidy, aes(x = subclass.l2, y = Gene, fill = Expression)) +
geom_tile() +
scale_fill_distiller(palette = "RdYlBu") +
theme_minimal()
f2KPMP.TAL@meta.data$Enrollment.Category <- factor(KPMP.TAL@meta.data$Enrollment.Category, levels = c("Healthy Reference", "AKI", "CKD"))
DimPlot(KPMP.TAL, group.by = "subclass.l2", label = T, split.by = "Enrollment.Category")KPMP.TAL@meta.data$Proteinuria..mg...Binned. <- factor(KPMP.TAL@meta.data$Proteinuria..mg...Binned., levels = c("<150 mg/g cr", "150 to <500 mg/g cr", "500 to <1000 mg/g cr", ">=1000 mg/g cr", ""))
DimPlot(KPMP.TAL, group.by = "subclass.l2", label = T, split.by = "Proteinuria..mg...Binned.")DotPlot(
KPMP.TAL,
assay = NULL,
features = "KRT19",
cols = c("lightgrey", "blue"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
idents = NULL,
group.by = "Enrollment.Category",
split.by = NULL,
cluster.idents = FALSE,
scale = T,
scale.by = "radius",
scale.min = NA,
scale.max = NA
)DotPlot(
KPMP.TAL,
assay = NULL,
features = "KRT19",
cols = c("lightgrey", "blue"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
idents = NULL,
group.by = "subclass.l2",
split.by = NULL,
cluster.idents = FALSE,
scale = T,
scale.by = "radius",
scale.min = NA,
scale.max = NA
)DotPlot(
KPMP.TAL,
assay = NULL,
features = "KRT19",
cols = c("lightgrey", "blue"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
idents = NULL,
group.by = "Enrollment.Category",
split.by = NULL,
cluster.idents = FALSE,
scale = F,
scale.by = "radius",
scale.min = NA,
scale.max = NA
)DotPlot(
KPMP.TAL,
assay = NULL,
features = "KRT19",
cols = c("lightgrey", "blue"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
idents = NULL,
group.by = "subclass.l2",
split.by = NULL,
cluster.idents = FALSE,
scale = F,
scale.by = "radius",
scale.min = NA,
scale.max = NA
)KPMP.TAL@meta.data$KRT19status <- ifelse(GetAssayData(KPMP.TAL, assay = "RNA", slot = "data")["KRT19", ] > 0, "yes", "no")
DimPlot(KPMP.TAL, group.by = "KRT19status", label = T)VD.DEGs <- read.xlsx(here("DEG_NewTAL_vs_TAL.xlsx"))
# change x1 to "gene"
VD.DEGs <- VD.DEGs %>% rename(gene = X1)
x.markers <- VD.DEGs
x.markers_tb <- x.markers %>% data.frame() %>% filter(p_val_adj < 0.05) %>% filter(abs(avg_log2FC) > .5) %>% as_tibble()
top6 <- list(head(x.markers_tb$gene))
x.markers_tb_H <- mousegnameConverter(x.markers_tb, "gene")## ================================================================================
x.markers_tb_H = x.markers_tb_H[order(x.markers_tb_H[,"avg_log2FC"], decreasing = TRUE),]
x.markers_tb_H# move the KRT19.DEG rownames to be a column called "gene"
y.markers <- KRT19.DEGs
y.markers_tb <- y.markers %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
filter(p_val_adj < 0.05) %>%
filter(abs(avg_log2FC) > .5) %>%
as_tibble()
#X-Y DEGs Intersection Table
xy.comp.TAL <- inner_join(x.markers_tb_H, y.markers_tb, by = "gene")
xy.comp.TAL #Set Range for Far Right Data Points
df.upper <- subset(xy.comp.TAL, avg_log2FC.x > -.32 & avg_log2FC.y > -.32)
#Set Range for Far Left Data Points
df.lower <- subset(xy.comp.TAL, avg_log2FC.x < 0.32 & avg_log2FC.y < .32)
xy.comp.TAL.plot <- ggplot(xy.comp.TAL, aes(x = avg_log2FC.x, y = avg_log2FC.y, label=gene)) +
theme_classic() +
geom_point(
color=dplyr::case_when(
(xy.comp.TAL$avg_log2FC.x > 1 & xy.comp.TAL$avg_log2FC.y > 1) ~ "#1b9e77", #sets color for df.upper points
(xy.comp.TAL$avg_log2FC.x < -1 & xy.comp.TAL$avg_log2FC.y < -1) ~ "#d95f02", #sets color for df.lower points
TRUE ~ "black")) +
geom_text_repel(data=rbind(df.upper, df.lower),
segment.sixy.comp.TALe = 0.2, #<--!! what is this?? !!--
segment.color = "grey50") +
geom_smooth (method=lm) +
labs(
title = paste("Correlation of Log2FC Values of DEGs from",
"Mouse New TAL", "and",
"KPMP KRT19+ Cells", sep = " "),
x = paste("Average log2FC", "Mouse"),
y = paste("Average log2FC", "KPMP")
) +
stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")),
label.x.npc = "left", label.y.npc = 0.90, #set the position of the eq
rr.digits = 3)
print(xy.comp.TAL.plot)## [1] 54
cor_result <- cor.test(xy.comp.TAL$avg_log2FC.x, xy.comp.TAL$avg_log2FC.y, method = "pearson")
cor_result##
## Pearson's product-moment correlation
##
## data: xy.comp.TAL$avg_log2FC.x and xy.comp.TAL$avg_log2FC.y
## t = 6.546, df = 52, p-value = 2.619e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4931235 0.7965416
## sample estimates:
## cor
## 0.6721359
## [1] "2024-12-08 12:07:08 PST"
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] fastmatch_1.1-4 ggvenn_0.1.10 gplots_3.2.0
## [4] openxlsx_4.2.5.2 SeuratData_0.2.2.9001 viridis_0.6.4
## [7] viridisLite_0.4.2 RColorBrewer_1.1-3 ggupset_0.3.0
## [10] ggpmisc_0.5.4-1 ggpp_0.5.4 gghighlight_0.4.0
## [13] ggrepel_0.9.5 geneName_0.2.3 lubridate_1.9.2
## [16] forcats_1.0.0 stringr_1.5.1 purrr_1.0.2
## [19] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [22] tidyverse_2.0.0 paletteer_1.5.0 here_1.0.1
## [25] ggplot2_3.5.1 knitr_1.45 SeuratDisk_0.0.0.9021
## [28] SeuratObject_5.0.1 Seurat_4.4.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.1 later_1.3.1
## [4] bitops_1.0-7 polyclip_1.10-6 confintr_1.0.2
## [7] lifecycle_1.0.4 rprojroot_2.0.4 globals_0.16.2
## [10] lattice_0.21-8 hdf5r_1.3.8 MASS_7.3-60
## [13] magrittr_2.0.3 limma_3.56.2 plotly_4.10.4
## [16] sass_0.4.9 rmarkdown_2.25 jquerylib_0.1.4
## [19] yaml_2.3.7 httpuv_1.6.11 sctransform_0.4.1
## [22] spam_2.10-0 zip_2.3.0 sp_2.1-3
## [25] spatstat.sparse_3.0-3 reticulate_1.34.0 cowplot_1.1.3
## [28] pbapply_1.7-2 abind_1.4-5 Rtsne_0.17
## [31] rappdirs_0.3.3 irlba_2.3.5.1 listenv_0.9.1
## [34] spatstat.utils_3.0-4 MatrixModels_0.5-3 goftest_1.2-3
## [37] spatstat.random_3.2-2 fitdistrplus_1.1-11 parallelly_1.36.0
## [40] leiden_0.4.3.1 codetools_0.2-19 tidyselect_1.2.0
## [43] farver_2.1.1 matrixStats_1.2.0 spatstat.explore_3.2-5
## [46] jsonlite_1.8.8 ellipsis_0.3.2 progressr_0.14.0
## [49] ggridges_0.5.6 survival_3.5-5 tools_4.3.1
## [52] ica_1.0-3 Rcpp_1.0.12 glue_1.7.0
## [55] gridExtra_2.3 mgcv_1.8-42 xfun_0.40
## [58] withr_3.0.0 BiocManager_1.30.22 fastmap_1.1.1
## [61] fansi_1.0.4 SparseM_1.81 caTools_1.18.2
## [64] digest_0.6.33 timechange_0.2.0 R6_2.5.1
## [67] mime_0.12 colorspace_2.1-0 scattermore_1.2
## [70] gtools_3.9.5 tensor_1.5 spatstat.data_3.0-4
## [73] utf8_1.2.3 generics_0.1.3 data.table_1.14.10
## [76] httr_1.4.7 htmlwidgets_1.6.2 uwot_0.1.16
## [79] pkgconfig_2.0.3 gtable_0.3.4 lmtest_0.9-40
## [82] htmltools_0.5.8.1 dotCall64_1.1-1 scales_1.3.0
## [85] png_0.1-8 rstudioapi_0.15.0 tzdb_0.4.0
## [88] reshape2_1.4.4 nlme_3.1-162 cachem_1.0.8
## [91] zoo_1.8-12 KernSmooth_2.23-21 vipor_0.4.5
## [94] parallel_4.3.1 miniUI_0.1.1.1 ggrastr_1.0.2
## [97] pillar_1.9.0 vctrs_0.6.5 RANN_2.6.1
## [100] promises_1.2.1 xtable_1.8-4 cluster_2.1.4
## [103] beeswarm_0.4.0 evaluate_0.23 cli_3.6.1
## [106] compiler_4.3.1 rlang_1.1.1 crayon_1.5.2
## [109] future.apply_1.11.1 labeling_0.4.3 rematch2_2.1.2
## [112] ggbeeswarm_0.7.2 plyr_1.8.9 stringi_1.7.12
## [115] deldir_2.0-2 munsell_0.5.0 lazyeval_0.2.2
## [118] spatstat.geom_3.2-8 quantreg_5.97 Matrix_1.6-5
## [121] hms_1.1.3 patchwork_1.2.0 bit64_4.0.5
## [124] future_1.33.1 shiny_1.8.0 highr_0.10
## [127] ROCR_1.0-11 igraph_1.6.0 bslib_0.8.0
## [130] bit_4.0.5 polynom_1.4-1